rm(list=ls())
library(dplyr)
library(haven)
library(data.table)
library(quantreg)
library(lubridate)
library(readr)
library(openxlsx)
library(multiwayvcov)
library(lmtest)
library(texreg)
library(foreign)

CRSP_IBES <- read_dta("01_InData/CRSP_IBES_link.dta")
data2015<-data.table(read_dta("01_InData/Earn_2015_long.dta"))
data2015<-data2015[fpi=="0",]
data2015$ANNDATS<-as.Date(as.character(data2015$anndats), "%d%b%Y")
data2015$year<-year(data2015$ANNDATS)
data2015$month<-month(data2015$ANNDATS)
data2015<-subset(data2015, year>1995)
setnames(data2015, "analys", "ANALYS")

Estimid <- read_dta("01_InData/analyst_estimator.dta")
data2015_Estimid<-merge(data2015, Estimid, by=c("ANALYS", "year","month"))
data2015_Estimid$monthdate=data2015_Estimid$year*12+data2015_Estimid$month
data2015_Estimid<-rename(data2015_Estimid, "CUSIP8"="cusip")

data2015_Estimid_simple1<-subset(data2015_Estimid, year>=2005)
data2015_Estimid_simple1<-data2015_Estimid_simple1[,c("ANNDATS", "full_broker_name", "CUSIP8")]


Broker_bank<-read_dta("01_InData/SDC_brokernamelink.dta")
data2015_EstBB<-merge(data2015_Estimid_simple1, Broker_bank, by=c("full_broker_name"), allow.cartesian = TRUE)

data2015_Estimid_simple2<-subset(data2015_Estimid, year<2005)
data2015_Estimid_simple2<-data2015_Estimid_simple2[,c("ANNDATS", "full_broker_name", "CUSIP8")]



data2015_EstBB1<-merge(data2015_Estimid_simple2, Broker_bank, by=c("full_broker_name"), allow.cartesian = TRUE)
data2015_EstBB1<-rbind(data2015_EstBB, data2015_EstBB1)
rm(data2015_EstBB)
data2015_EstBB1$year<-year(data2015_EstBB1$ANNDATS)
data2015_EstBB1$month<-month(data2015_EstBB1$ANNDATS)
data2015_EstBB1$monthdate=data2015_EstBB1$year*12+data2015_EstBB1$month

data2015_EstBB1<-data2015_EstBB1[,-c(5, 6)]

SDC<-read_csv("01_InData/SDC_stocksbonds_USnation_1994_2019.csv")
SDC<-subset(SDC, SDC$ALLMGRROLE_DESCR %in% c("BOOK RUNNER", "CO MANAGER", "JOINT BOOK RUNNER", "LEAD PLACEMENT AGENT", "CO PLACEMENT AGENT", "JOINT LEAD PLACEMENT AGENT", "CO LEAD MANAGER", "JOINT LEAD MANAGER"))
SDC$date_str<-as.character(SDC$MASTER_DEAL_DATE)
SDC$year_str<-substring(SDC$date_str,1,4)
SDC$month_str<-substring(SDC$date_str,5,6)
SDC$year<-as.numeric(SDC$year_str)
SDC$month<-as.numeric(SDC$month_str)
SDC$monthdate<-SDC$year*12+SDC$month
SDC$CUSIP8 <- substring(SDC$CUSIP9, 1,8)

SDC_yes<-subset(SDC, IPO=="Yes")

IBES_SDC<-merge(data2015_EstBB1, SDC_yes, by=c("CUSIP8", "ALLMANAGERSLONG"))
IBES_SDC$md_dif<-IBES_SDC$monthdate.x-IBES_SDC$monthdate.y
IBES_SDC_IPO<-subset(IBES_SDC, md_dif>=0 & md_dif<=60)

SDC_no<-subset(SDC, IPO=="No")
SDC_no<-subset(SDC_no, SECURITY !="Gen Obl Bonds" & SECURITY !="Lease Rev Bonds" & SECURITY != "Revenue Bonds" & SECURITY != "Float Rate Nts" & SECURITY != "Senior Notes" & SECURITY != "Unsec Prom Nts")
IBES_SDC_SEO<-merge(data2015_EstBB1, SDC_no, by=c("CUSIP8", "ALLMANAGERSLONG"))
IBES_SDC_SEO$md_dif<-IBES_SDC_SEO$monthdate.x-IBES_SDC_SEO$monthdate.y
IBES_SDC_SEO<-subset(IBES_SDC_SEO, md_dif>=0 & md_dif<=24)

SDC_bonds<-subset(SDC, is.na(IPO))
IBES_SDC_bonds<-merge(data2015_EstBB1, SDC_bonds, by=c("CUSIP8", "ALLMANAGERSLONG"))
IBES_SDC_bonds$md_dif<-IBES_SDC_bonds$monthdate.x-IBES_SDC_bonds$monthdate.y
IBES_SDC_bonds<-subset(IBES_SDC_bonds, md_dif>=0 & md_dif<=12)

IBES_SDC<-rbind(IBES_SDC_bonds, IBES_SDC_SEO)
IBES_SDC<-rbind(IBES_SDC, IBES_SDC_IPO)
IBES_SDC$monthdate<-IBES_SDC$monthdate.x

IBES_unaffiliated<-anti_join(data2015_Estimid, IBES_SDC, by=c("full_broker_name", "CUSIP8", "monthdate"))

X<-as.data.table(na.omit(as.data.table(IBES_unaffiliated),cols="ticker"))

IBES_CRSP<-data.table(read.csv("01_InData/IBES_CRSP_link_table.csv"))
X<-X[order(ANALYS, ticker, ANNDATS),]
X<-X[, max_anntims :=max(anntims),by=c("ANALYS", "ticker","ANNDATS")]
X<-X[anntims==max_anntims,]


Recs<- data.table(read.csv("01_InData/recs.csv"))
Recs <- Recs[order(AMASKCD,TICKER, ANNDATS)]
Recs<-Recs[, ANNDATS:=as.Date(as.character(ANNDATS), "%m/%d/%Y")]

Recs <- Recs[order(AMASKCD,TICKER, ANNDATS, ANNTIMS)]
Recs<-Recs[, max_anntims :=max(ANNTIMS),by=c("AMASKCD", "TICKER","ANNDATS")]
Recs<-Recs[ANNTIMS==max_anntims,]
Recs<-Recs[, c("TICKER", "IRECCD", "AMASKCD", "ANNDATS")]

X_Recs<-
  merge(
    X,
    Recs,
    by.x=c("ANALYS", "ticker"),
    by.y=c("AMASKCD", "TICKER"),
    allow.cartesian=TRUE
  )

setnames(X_Recs, "ANNDATS.y", "ANNDATS")
setnames(X_Recs, "ANNDATS.x", "anndats_revision")

X_Recs<-X_Recs[ANNDATS<anndats_revision,]
setnames(X_Recs, "ticker", "TICKER")
X_Recs<-X_Recs[, max_ANNDATS :=max(ANNDATS),by=c("ANALYS", "TICKER", "anndats_revision")]
X_Recs<-X_Recs[max_ANNDATS==ANNDATS,]

X_Recs<-X_Recs[, anndats_year := format(anndats_revision, "%Y")]

X_Recs<-X_Recs[IRECCD<3, good := 1]
X_Recs<-X_Recs[IRECCD>3, bad := 1]
X_Recs<-X_Recs[IRECCD==3, neutral := 1]

X_Recs<-unique(X_Recs, by=c("ANALYS","TICKER","anndats_revision"))


X_Recs <- X_Recs[order(ANALYS, TICKER, anndats_revision)]
X_Recs[, lag1_value := shift(value, n = 1, type = 'lag'), by = list(ANALYS, TICKER, fpedats)]
X_Recs[, revision := value-lag1_value]

X_Recs<-X_Recs[(revision!=0 & !is.na(revision)),]


Actuals <- data.table(read.csv("01_InData/actuals.csv"))
Actuals[, FPEDATS:=PENDS]

Actuals<-Actuals[PDICITY=="ANN",]

Actuals<-unique(Actuals, by=c("TICKER","FPEDATS"))

Actuals[, actual:=VALUE]
Actuals<-Actuals[order(TICKER, FPEDATS)]


Actuals<-
  merge(
    Actuals,
    IBES_CRSP,
    by=c("TICKER")
  )
Actuals<-Actuals[ANNDATS>=sdate & ANNDATS<=edate,]
Actuals[, ANNDATS := as.Date(as.character(ANNDATS), "%Y%m%d")]
Actuals[, FPEDATS := as.Date(as.character(FPEDATS),"%Y%m%d")]
Actuals[, year := format(FPEDATS, "%Y")]
Actuals<-na.omit(Actuals, cols="PERMNO")

CRSP_daily <- data.table(read.csv("01_InData/crsp.csv"))
CRSP_daily[, date := as.Date(as.character(date), "%Y%m%d")]

Actuals<-
  merge(
    Actuals,
    CRSP_daily,
    by.x=c("PERMNO","ANNDATS"),
    by.y=c("PERMNO", "date"),
    all.x=TRUE
  )
Actuals[, actual_adj:=actual/CFACSHR]

Actuals<-Actuals[order(TICKER, FPEDATS)]


for (v in (3:5)){
  cmdstr1<-paste0("Actuals[, lead",v,"_actual := shift(actual_adj, n=",v,",type='lead'),by=list(TICKER)]")
  eval(parse(text=cmdstr1))
  cmdstr1<-paste0("Actuals[, ltg_",v," := (((lead",v,"_actual-actual_adj)/abs(actual_adj)+1)^(1/",v,")-1)*100]")
  eval(parse(text=cmdstr1))
}

Actuals<-unique(Actuals, by=c("TICKER","year"))

Forecasts_act<-
  merge(
    X_Recs,
    Actuals,
    by.x = c("TICKER", "anndats_year"),
    by.y = c("TICKER", "year")
  )


for (v in (3:5)){
  cmdstr1<-paste0("Forecasts_act<-Forecasts_act[, forecast_error", v,"_:= ltg_", v,"-value]")
  eval(parse(text=cmdstr1))
}
Forecasts_act<-Forecasts_act[, c(1:5,15,18,25,30,32,35:37,39,63:65)]

write_dta(Forecasts_act, path="02_OutData/forecasts_act_ltg_ind_unaff.dta")


